/* a_gen_match_data.do *********************************************************

	This do file generates the match data used for soda asssignment.

*******************************************************************************/
set more off

use "$raw_data_bps/BPS assignment data/SY1997-2005/assign.dta", clear
append using "$raw_data_bps/BPS assignment data/SY1997-2005/assign05.dta"
replace geocode = string(geo) if missing(geocode)
rename newno id
merge m:1 id using "$raw_data_bps/Crosswalks/STIDMAP.dta", keep(master matched)

replace year = year-30000
replace year = year-10000 if year > 9000
replace year = year-10000 if year > 9000
replace year = year-10000 if year > 9000

keep if inlist(year, 1997,1998,1999,2000,2001,2002,2003)

save "$stata_data_assignment/initial_set.dta", replace

keep if round=="1"
keep if !inlist(status,"I","R")

tempfile assign
save "$stata_data_assignment/assign_complete.dta", replace

// a little interlude with the capacity file.

local year "1997 1998 1999 2000 2001 2002 2003"
local grade "K1 K0"

foreach y of local year {
	foreach g of local grade {


		use "$stata_data_assignment/assign_complete.dta", clear

		keep if grade=="`g'"

		keep if year==`y'

		di "`g'"
		di "`y'"

		reshape long ch wch pri pch, i(studentno) j(pref)
		bys student (pref) : drop if missing(ch) & _n>1

		bys studentno: gen num_ranked = _N

		bys studentno: egen first_walk = max(pref == 1 & wch == "W")

		bys studentno ch : gen num = _N

		tab num if sep==string(ch)

		// quite a few rank same school multiple times

		gen sepnomatch = sep!=string(ch)

		bys studentno (sepnomatch pref) : gen seprep = string(ch[1]) if sepnomatch[1]==0
		bys studentno (sepnomatch pref) : gen sepprog = pch[1] if sepnomatch[1]==0

		order sep seprep sepprog
		count if sep!=seprep

		gen pid = sep + sepprog + "00" if !missing(sep,seprep)		// the "00" is to make the program code 3 digits long, like later years.
		bys pid : gen pfirstone=_n==1
		bys studentno : gen sfirstone = _n==1



		clonevar Geo = geocode
		destring Geo, replace

		merge m:1 Geo using "$stata_data_location/EastGeos.dta", keep(master matched) nogen
		drop Geo

		replace East = 0 if East==.

		compress

		gen chp = string(ch) + pch + "00" if !missing(ch,pch)

		gen gp = string(guaran) + pguaran + "00" if !missing(guaran,pguaran)



		/* Make a single program level variable */

		bys studentno : egen inlist = max(pid==chp)

		cap codebook studentno if inlist==0
		codebook studentno if inlist==1


		// N is none of their choices, U is unassigned (typically KG), "D" is administratively assigned.
		// in case of administratively assigned or assigned >5 (assignrun==95), we can't see the program the student was
		// assigned to. It would be bad to take a guess.
		// After 2001, "HA" is hand assigned.
		count if inlist==0 & !inlist(status,"N","U","D") & assignrun!="95"

		display "Year is `y'"
		display "Grade is `g'"

		bys studentno : gen maxrank = _N

		if `y'<=1999 {
		// a few cases where we see assignrun==5 or 11, but sep gives a non-first choice school. fix those
			bys studentno (pref) : replace sep = string(ch[1]) if inlist(assignrun,"05","11") & inlist==0
			bys studentno (pref) : replace sepprog = pch[1] if inlist(assignrun,"05","11") & inlist==0
		// another few cases where assignrun==33
			bys studentno (pref) : replace sep = string(ch[2]) if inlist(assignrun,"33") & !missing(ch[2]) & inlist==0
			bys studentno (pref) : replace sepprog = pch[2] if inlist(assignrun,"33") & !missing(pch[2]) & inlist==0
		// I have one person in 1998 K2 who is assignrun==33, but only has one item in their list. I'll interpret this as assignrun==11 (fewest typo explanation).

			bys studentno (pref) : replace sep = string(ch[1]) if inlist(assignrun,"33") & inlist==0 & maxrank==1 & !missing(ch[1])
			bys studentno (pref) : replace sepprog = pch[1] if inlist(assignrun,"33") & inlist==0 & maxrank==1 & !missing(pch[1])

			replace pid = sep + sepprog + "00" if !missing(sep,sepprog)	& inlist(assignrun,"05","11","33") & inlist==0
		}

		if `y'==2004 {
			bys studentno (pref) : replace sep = string(ch[1]) if inlist(assignrun,"05") & inlist==0
			bys studentno (pref) : replace sepprog = pch[1] if inlist(assignrun,"05") & inlist==0
			replace pid = sep + sepprog + "00" if !missing(sep,sepprog)	& inlist(assignrun,"05","11","33") & inlist==0

		}
		drop inlist
		bys studentno : egen inlist = max(pid==chp)

		count if inlist==0 & !inlist(status,"N","U","D","E","H","HA") & !inlist(assignrun,"95","HA")

		assert r(N)==0

		preserve
			keep if sfirstone==1
			bys pid sfirstone : gen pcap = _N if !missing(pid)
			drop if inlist(assignrun,"95","HA") | sep=="4840"	| inlist(status,"N","U","D","E","H","HA")	// assignrun==95 is matched to >5 choice. 4840 is unassigned
			// I treat these seats as not existing.
			bys pid : keep if _n==1
			drop if missing(pid)
			keep pid pcap
			save "$stata_data_assignment/`y'/pcap`g'.dta", replace
		restore

		clonevar Geo = geocode

		save "$stata_data_assignment/`y'/StudentIDInfo`g'_modified.dta", replace


		preserve
			keep studentno pref chp
			rename chp pid
			bys studentno pid (pref) : keep if _n==1
			bys studentno (pref) : replace pref = _n
			save "$stata_data_assignment/`y'/rol`g'.dta", replace
		restore


		gen assignedto = pid==chp
		bys studentno : egen choice = max((assignedto==1)*pref)

		// Do not allow a student to have sibling priority if they were not assignrun sibling.

		gen assign_sib = 0
		foreach x in 01 02 03 04 05 13 14 18 19 22 23 26 27 {
			replace assign_sib = 1 if assignrun=="`x'" & year>=2000
		}
		replace assign_sib = 1 if inlist(assignrun,"02","03","04","05","12","13","14","15")==1 & year<=1999
		replace sibling = "" if assign_sib==0

		drop pri

		/* for after 2000 */
*		if `y'!=2005 {
			gen double pri = .
			replace pri = 1 if pri == .		&			(gp==chp & gp!="")  						// according to the codebook, should only receive guarantee priority at first choice.
			replace pri = 2 if pri == .  	& 	presch==ch 		&	sibling=="S" 					& 	inlist(wch,"W","C")==1 	& 	pref==1
			replace pri = 3 if pri == . 						& 	sibling=="S" 					& 	inlist(wch,"W","C")==1 	& 	pref==1
			replace pri = 4 if pri == . 	& 	presch==ch		& 	sibling=="S" 					& 	inlist(wch,"W","C")!=1 	& 	pref==1
			replace pri = 5 if pri == . 	 	 				& 	sibling=="S" 					& 	inlist(wch,"W","C")!=1 	& 	pref==1
			replace pri = 8 if pri == .  	& 	presch==ch 											& 	inlist(wch,"W","C")==1 	&  	pref==1
			replace pri = 9 if pri == . 	 														& 	inlist(wch,"W","C")==1 	& 	pref==1
			replace pri = 10 if pri ==.  	& 	presch==ch 											& 	inlist(wch,"W","C")!=1 	& 	pref==1

			replace pri = 12 if pri ==. 															& 								pref==1

			replace pri = 30 if pri ==. 	 					& 	sibling=="S" 					& 	inlist(wch,"W","C")==1	& 	pref==2
			replace pri = 31 if pri ==.		 					& 	sibling=="S"					& 	inlist(wch,"W","C")!=1 	& 	pref==2
			replace pri = 32 if pri ==.		 					& 					 					inlist(wch,"W","C")==1 	& 	pref==2
*			replace pri = 33 if pri ==.							& 										wch=="C"	& 	pref==2
			replace pri = 34 if pri ==.		 					& 					 					inlist(wch,"W","C")!=1 	& 	pref==2


			replace pri = 40 if pri ==. 	 					& 	sibling=="S" 					& 	inlist(wch,"W","C")==1	& 	pref==3
			replace pri = 41 if pri ==.		 					& 	sibling=="S"					& 	inlist(wch,"W","C")!=1 	& 	pref==3
			replace pri = 42 if pri ==.		 					& 					 					inlist(wch,"W","C")==1 	& 	pref==3
			replace pri = 43 if pri ==.		 					& 					 					inlist(wch,"W","C")!=1 	& 	pref==3

			replace pri = 50 if pri ==. 	 					& 	sibling=="S" 					& 	inlist(wch,"W","C")==1	& 	pref==4
			replace pri = 51 if pri ==.		 					& 	sibling=="S"					& 	inlist(wch,"W","C")!=1 	& 	pref==4
			replace pri = 52 if pri ==.		 					& 					 					inlist(wch,"W","C")==1 	& 	pref==4
			replace pri = 53 if pri ==.		 					& 					 					inlist(wch,"W","C")!=1 	& 	pref==4

			replace pri = 60 if pri ==. 	 					& 	sibling=="S" 					& 	inlist(wch,"W","C")==1	& 	pref==5
			replace pri = 61 if pri ==.		 					& 	sibling=="S"					& 	inlist(wch,"W","C")!=1 	& 	pref==5
			replace pri = 62 if pri ==.		 					& 					 					inlist(wch,"W","C")==1 	& 	pref==5
			replace pri = 63 if pri ==.		 					& 					 					inlist(wch,"W","C")!=1 	& 	pref==5

			replace pri = 71 if pri==.																				& pref>=6 & pref!=.	  // just for 0405 file.

			// There are some cases (2005 at least) where students receive assignrun==01 and status=="G" at guaran, presch even though did not rank it first.
			replace pri = 1 if assignrun=="01" & pid==chp & regexm(pid,string(guaran))

			// I have some students who need walk zone priority to rationalize the match, and who were assigned by priority 09, which indicates they had walk zone priority.
			replace pri = 9 if assignrun=="09" & year==2005 & sibling=="" & assignedto==1

			/* Adjust for east boston school priority */

			gen east = 0
			replace east = 1 if ch==4360 // McKay K-8
			replace east = 1 if ch==4322 // Otis Elementary
			replace east = 1 if ch==4450 // East Boston EEC
			replace east = 1 if ch==1070 // East Boston High
			replace east = 1 if ch==4061 // Guild Elementary
			replace east = 1 if ch==4541 // Kennedy Patrick Elem
			replace east = 1 if ch==4543 // O'Donnell Elementary
			replace east = 1 if ch==2050 // Umana Middle
			replace east = 1 if ch==4062 // Bradley Elementary
			replace east = 1 if ch==4361 // Adams Elementary??????????
			replace east = 1 if ch==4323 // Mario Umana??


			preserve
				keep chp studentno pri random bilingual East
				duplicates drop studentno chp, force
				rename chp pid
				rename pri prirank
				save "$stata_data_assignment/`y'/pri`g'.dta", replace
			restore

		}
	}
